df <- read.csv("merged-rev.csv", header =TRUE, sep=",")
#df <- df[complete.cases(df), ]  
df
df$ln_novelty <- log(df$novelty+1)
df$ln_total <- log(df$total+1) 
df$group = factor(df$group)
df
df$group <- relevel(df$group, ref = "3")
mod <- lm(ln_total ~ factor(group), data=df)
summary(mod)

Call:
lm(formula = ln_total ~ factor(group), data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.7373 -0.2143  0.3493  0.8478  1.7667 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)      5.1471     0.1186  43.387  < 2e-16 ***
factor(group)0  -1.0447     0.1653  -6.319 4.99e-10 ***
factor(group)1  -0.4098     0.1634  -2.509 0.012372 *  
factor(group)2  -0.6020     0.1624  -3.706 0.000229 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.438 on 631 degrees of freedom
Multiple R-squared:  0.06168,   Adjusted R-squared:  0.05722 
F-statistic: 13.83 on 3 and 631 DF,  p-value: 9.65e-09
df$group <- relevel(df$group, ref = "3")
mod <- lm(ln_novelty ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + count, data=df)
summary(mod)

Call:
lm(formula = ln_novelty ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + 
    Q8_Q8_1 + Q10 + count, data = df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.72895 -0.10146  0.05141  0.14364  0.30273 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)     0.429485   0.035223  12.193  < 2e-16 ***
factor(group)0 -0.121106   0.024055  -5.035 6.31e-07 ***
factor(group)1 -0.122149   0.023729  -5.148 3.56e-07 ***
factor(group)2 -0.058217   0.023417  -2.486  0.01318 *  
Q7_Q7_1        -0.021111   0.006899  -3.060  0.00231 ** 
Q7_Q7_2         0.028960   0.007017   4.127 4.18e-05 ***
Q8_Q8_1         0.006828   0.007283   0.937  0.34889    
Q10             0.006321   0.010657   0.593  0.55327    
count           0.013205   0.002806   4.706 3.12e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.205 on 610 degrees of freedom
  (16 observations deleted due to missingness)
Multiple R-squared:  0.127, Adjusted R-squared:  0.1156 
F-statistic:  11.1 on 8 and 610 DF,  p-value: 1.067e-14
df$group <- relevel(df$group, ref = "3")
mod1 <- lm(ln_novelty ~ factor(group) + factor(phase) + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + count, data=df)
summary(mod1)

Call:
lm(formula = ln_novelty ~ factor(group) + factor(phase) + Q7_Q7_1 + 
    Q7_Q7_2 + Q8_Q8_1 + Q10 + count, data = df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.74856 -0.09893  0.05357  0.14560  0.31947 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)     0.412018   0.038061  10.825  < 2e-16 ***
factor(group)0 -0.120733   0.024045  -5.021 6.76e-07 ***
factor(group)1 -0.121842   0.023719  -5.137 3.77e-07 ***
factor(group)2 -0.057849   0.023407  -2.471  0.01373 *  
factor(phase)2  0.002902   0.023359   0.124  0.90117    
factor(phase)3  0.027766   0.023329   1.190  0.23444    
factor(phase)4  0.036093   0.023275   1.551  0.12149    
Q7_Q7_1        -0.021078   0.006896  -3.057  0.00234 ** 
Q7_Q7_2         0.028966   0.007014   4.130 4.13e-05 ***
Q8_Q8_1         0.006883   0.007280   0.945  0.34480    
Q10             0.006380   0.010652   0.599  0.54942    
count           0.013216   0.002808   4.706 3.13e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2049 on 607 degrees of freedom
  (16 observations deleted due to missingness)
Multiple R-squared:  0.1321,    Adjusted R-squared:  0.1164 
F-statistic: 8.402 on 11 and 607 DF,  p-value: 7.504e-14
anova(mod, mod1)
Analysis of Variance Table

Model 1: ln_novelty ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + 
    count
Model 2: ln_novelty ~ factor(group) + factor(phase) + Q7_Q7_1 + Q7_Q7_2 + 
    Q8_Q8_1 + Q10 + count
  Res.Df   RSS Df Sum of Sq      F Pr(>F)
1    610 25.63                           
2    607 25.48  3    0.1499 1.1903 0.3126
library(lmerTest)
fit.lmer <- lmer(ln_novelty ~ factor(group) + ( 1 | phase), data = df, REML= FALSE)
boundary (singular) fit: see help('isSingular')
fit.lmer
Linear mixed model fit by maximum likelihood  ['lmerModLmerTest']
Formula: ln_novelty ~ factor(group) + (1 | phase)
   Data: df
      AIC       BIC    logLik  deviance  df.resid 
-149.4723 -122.7506   80.7362 -161.4723       629 
Random effects:
 Groups   Name        Std.Dev.
 phase    (Intercept) 0.0000  
 Residual             0.2131  
Number of obs: 635, groups:  phase, 4
Fixed Effects:
   (Intercept)  factor(group)0  factor(group)1  factor(group)2  
       0.53572        -0.13948        -0.13047        -0.05857  
optimizer (nloptwrap) convergence code: 0 (OK) ; 0 optimizer warnings; 1 lme4 warnings 
tapply(df$ln_novelty, df$group, summary)
$`3`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.3307  0.4855  0.5596  0.5357  0.6162  0.6894 

$`0`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.0000  0.5206  0.3962  0.6073  0.6858 

$`1`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.1777  0.5062  0.4053  0.6182  0.6931 

$`2`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.3871  0.5465  0.4771  0.6084  0.6904 
tapply(df$ln_total, df$group, summary)
$`3`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  4.331   4.764   5.092   5.147   5.520   5.891 

$`0`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.000   3.991   4.830   4.102   5.337   5.869 

$`1`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.000   4.553   5.089   4.737   5.580   5.882 

$`2`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.000   4.615   4.925   4.545   5.450   5.884 
library(vtree)
Registered S3 methods overwritten by 'htmltools':
  method               from         
  print.html           tools:rstudio
  print.shiny.tag      tools:rstudio
  print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
vtree version 5.6.5 -- For more information, type: vignette("vtree")
vtree(df, "group")
vtree(df, c("phase", "group"), 
   fillcolor = c( phase = "#e7d4e8", group = "#99d8c9"),
   horiz = FALSE)
df$group <- relevel(df$group, ref = "3")
mod <- lm(ln_total ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + count, data=df)
summary(mod)

Call:
lm(formula = ln_total ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + 
    Q10 + count, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.6297 -0.2336  0.3334  0.7804  1.9701 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)     4.84326    0.23167  20.906  < 2e-16 ***
factor(group)0 -0.98796    0.15821  -6.244 7.97e-10 ***
factor(group)1 -0.42687    0.15607  -2.735 0.006418 ** 
factor(group)2 -0.60248    0.15402  -3.912 0.000102 ***
Q7_Q7_1        -0.19595    0.04537  -4.319 1.83e-05 ***
Q7_Q7_2         0.19658    0.04615   4.260 2.37e-05 ***
Q8_Q8_1        -0.10785    0.04790  -2.251 0.024713 *  
Q10             0.17939    0.07009   2.559 0.010723 *  
count           0.12735    0.01845   6.901 1.29e-11 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.348 on 610 degrees of freedom
  (16 observations deleted due to missingness)
Multiple R-squared:  0.177, Adjusted R-squared:  0.1662 
F-statistic:  16.4 on 8 and 610 DF,  p-value: < 2.2e-16
with(df, interaction.plot(group, phase, ln_total, ylim=c(0, max(ln_total)))) # interaction plot

with(df, interaction.plot(group, phase, ln_novelty, ylim=c(0, max(ln_novelty)))) # interaction plot

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQpkZiA8LSByZWFkLmNzdigibWVyZ2VkLXJldi5jc3YiLCBoZWFkZXIgPVRSVUUsIHNlcD0iLCIpCiNkZiA8LSBkZltjb21wbGV0ZS5jYXNlcyhkZiksIF0gIApkZgpgYGAKCgpgYGB7cn0KZGYkbG5fbm92ZWx0eSA8LSBsb2coZGYkbm92ZWx0eSsxKQpkZiRsbl90b3RhbCA8LSBsb2coZGYkdG90YWwrMSkgCmRmJGdyb3VwID0gZmFjdG9yKGRmJGdyb3VwKQpkZgpgYGAKCmBgYHtyfQpkZiRncm91cCA8LSByZWxldmVsKGRmJGdyb3VwLCByZWYgPSAiMyIpCm1vZCA8LSBsbShsbl90b3RhbCB+IGZhY3Rvcihncm91cCksIGRhdGE9ZGYpCnN1bW1hcnkobW9kKQpgYGAKCmBgYHtyfQpkZiRncm91cCA8LSByZWxldmVsKGRmJGdyb3VwLCByZWYgPSAiMyIpCm1vZCA8LSBsbShsbl9ub3ZlbHR5IH4gZmFjdG9yKGdyb3VwKSArIFE3X1E3XzEgKyBRN19RN18yICsgUThfUThfMSArIFExMCArIGNvdW50LCBkYXRhPWRmKQpzdW1tYXJ5KG1vZCkKYGBgCgpgYGB7cn0KZGYkZ3JvdXAgPC0gcmVsZXZlbChkZiRncm91cCwgcmVmID0gIjMiKQptb2QxIDwtIGxtKGxuX25vdmVsdHkgfiBmYWN0b3IoZ3JvdXApICsgZmFjdG9yKHBoYXNlKSArIFE3X1E3XzEgKyBRN19RN18yICsgUThfUThfMSArIFExMCArIGNvdW50LCBkYXRhPWRmKQpzdW1tYXJ5KG1vZDEpCmBgYAoKYGBge3J9CmFub3ZhKG1vZCwgbW9kMSkKYGBgCgoKYGBge3J9CmxpYnJhcnkobG1lclRlc3QpCmZpdC5sbWVyIDwtIGxtZXIobG5fbm92ZWx0eSB+IGZhY3Rvcihncm91cCkgKyAoIDEgfCBwaGFzZSksIGRhdGEgPSBkZiwgUkVNTD0gRkFMU0UpCmZpdC5sbWVyCmBgYAoKYGBge3J9CnRhcHBseShkZiRsbl9ub3ZlbHR5LCBkZiRncm91cCwgc3VtbWFyeSkKYGBgCgpgYGB7cn0KdGFwcGx5KGRmJGxuX3RvdGFsLCBkZiRncm91cCwgc3VtbWFyeSkKYGBgCmBgYHtyfQpsaWJyYXJ5KHZ0cmVlKQp2dHJlZShkZiwgImdyb3VwIikKYGBgCmBgYHtyfQp2dHJlZShkZiwgYygicGhhc2UiLCAiZ3JvdXAiKSwgCiAgIGZpbGxjb2xvciA9IGMoIHBoYXNlID0gIiNlN2Q0ZTgiLCBncm91cCA9ICIjOTlkOGM5IiksCiAgIGhvcml6ID0gRkFMU0UpCmBgYAoKCmBgYHtyfQpkZiRncm91cCA8LSByZWxldmVsKGRmJGdyb3VwLCByZWYgPSAiMyIpCm1vZCA8LSBsbShsbl90b3RhbCB+IGZhY3Rvcihncm91cCkgKyBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAgKyBjb3VudCwgZGF0YT1kZikKc3VtbWFyeShtb2QpCmBgYApgYGB7cn0Kd2l0aChkZiwgaW50ZXJhY3Rpb24ucGxvdChncm91cCwgcGhhc2UsIGxuX3RvdGFsLCB5bGltPWMoMCwgbWF4KGxuX3RvdGFsKSkpKSAjIGludGVyYWN0aW9uIHBsb3QKYGBgCgpgYGB7cn0Kd2l0aChkZiwgaW50ZXJhY3Rpb24ucGxvdChncm91cCwgcGhhc2UsIGxuX25vdmVsdHksIHlsaW09YygwLCBtYXgobG5fbm92ZWx0eSkpKSkgIyBpbnRlcmFjdGlvbiBwbG90CmBgYAoK